library(tidyverse)
## -- Attaching packages ----------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts -------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
SNPs<- read.table("23andMe_complete.txt", header = TRUE, sep = "\t")
### Exercise 1
SNPs$chromosome = ordered(SNPs$chromosome, levels=c(seq(1,22), "X","Y","MT"))
ggplot(SNPs, aes(chromosome))+
geom_bar(color="blue", fill="blue") + 
  ggtitle("Total SNPs for each Chromosome") 

### Exercise 2
ggplot(SNPs, aes(chromosome, fill = genotype))+
geom_bar()+
  ggtitle("Total Number of SNPs for each Chromosome")+
  ylab("Number of SNPs")+
  xlab("Chromosome Number")+
  scale_fill_manual(values = c("AA"="red","CC"="red","GG"="red","TT"="red","AC"="red","AG"="red","AT"="red","CG"="red","CT"="red","GT"="red","A"="blue","C"="blue","G"="blue","T"="blue","D"="green","DD"="green","DI"="green","II"="green","I"="green","--" = "green"))

### Exercise 3
ppi <- 300
png("SNP_plot.png",width = 6*ppi, height = 6*ppi, res=ppi)
ggplot(SNPs, aes(chromosome, fill = genotype))+
geom_bar(position="dodge2")+
  ggtitle("Total Number of SNPs for each Chromosome")+
  ylab("Number of SNPs")+
  xlab("Chromosome Number")+
  scale_fill_manual(values = c("AA"="red","CC"="red","GG"="red","TT"="red","AC"="red","AG"="red","AT"="red","CG"="red","CT"="red","GT"="red","A"="blue","C"="blue","G"="blue","T"="blue","D"="green","DD"="green","DI"="green","II"="green","I"="green","--" = "green"))
dev.off()
## png 
##   2
### Exercise 4
ggplot(SNPs, aes(chromosome, fill = genotype))+
geom_bar(position="dodge2")+
facet_wrap(~genotype,ncol = 2, scales = "free_y")+
   ggtitle("Total Number of SNPs for each Chromosome")+
  ylab("Number of SNPs")+
  xlab("Chromosome Number")+
  scale_fill_manual(values = c("AA"="red","CC"="red","GG"="red","TT"="red","AC"="red","AG"="red","AT"="red","CG"="red","CT"="red","GT"="red","A"="blue","C"="blue","G"="blue","T"="blue","D"="green","DD"="green","DI"="green","II"="green","I"="green","--" = "green"))

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
### Exercise 5
ggplotly()
ggplot(SNPs, aes(chromosome, fill = genotype))+
geom_bar(position="dodge2")+
facet_wrap(~genotype,ncol = 2, scales = "free_y")+
   ggtitle("Total Number of SNPs for each Chromosome")+
  ylab("Number of SNPs")+
  xlab("Chromosome Number")+
  scale_fill_manual(values = c("AA"="red","CC"="red","GG"="red","TT"="red","AC"="red","AG"="red","AT"="red","CG"="red","CT"="red","GT"="red","A"="blue","C"="blue","G"="blue","T"="blue","D"="green","DD"="green","DI"="green","II"="green","I"="green","--" = "green"))

library(DT)
### Exercise 6
datatable(head(SNPs), class = "Y Chromosome")
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html